{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d05699a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4b51eab7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Series\n",
    "s1 = pd.Series(['a','b','c'],index=[1,2,3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "96fee5dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1    a\n",
      "2    b\n",
      "3    c\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(s1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "74ec9958",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "print(type(s1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "badf2207",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'a'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "653628c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "s2 = pd.Series({'k1':'v1','k2':'v2','k3':'v3'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b3fe9b25",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'v3'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s2['k3']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "779ae477",
   "metadata": {},
   "outputs": [],
   "source": [
    "dict1 = {\n",
    "    'name':['zhangSan','lisi','wangWu'],\n",
    "    'age':[20,22,21]\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "99a14f83",
   "metadata": {},
   "outputs": [],
   "source": [
    "# DataFrame\n",
    "df1 = pd.DataFrame(dict1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e665a7f5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       name  age\n",
      "0  zhangSan   20\n",
      "1      lisi   22\n",
      "2    wangWu   21\n"
     ]
    }
   ],
   "source": [
    "print(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b2ecebcf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "print(type(df1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ed0dd183",
   "metadata": {},
   "outputs": [],
   "source": [
    "data1 = df1['name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "bdde7901",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    zhangSan\n",
      "1        lisi\n",
      "2      wangWu\n",
      "Name: name, dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(data1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0e771c0e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "print(type(data1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8c451615",
   "metadata": {},
   "outputs": [],
   "source": [
    "data2 = df1[['name','age']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "720964a1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       name  age\n",
      "0  zhangSan   20\n",
      "1      lisi   22\n",
      "2    wangWu   21\n"
     ]
    }
   ],
   "source": [
    "print(data2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "95a97df6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "print(type(data2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "eec55f60",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>zhangSan</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>lisi</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>wangWu</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       name  age\n",
       "0  zhangSan   20\n",
       "1      lisi   22\n",
       "2    wangWu   21"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2.head() # 查看前五行数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "6890b927",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 读取文件\n",
    "stuDF = pd.read_csv('data/students.txt',header=None,names=['id','name','age','gender','clazz'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "e4ff1243",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>clazz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>施笑槐</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>吕金鹏</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1500100003</td>\n",
       "      <td>单乐蕊</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>理科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1500100004</td>\n",
       "      <td>葛德曜</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>理科三班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1500100005</td>\n",
       "      <td>宣谷芹</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>理科五班</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id name  age gender clazz\n",
       "0  1500100001  施笑槐   22      女  文科六班\n",
       "1  1500100002  吕金鹏   24      男  文科六班\n",
       "2  1500100003  单乐蕊   22      女  理科六班\n",
       "3  1500100004  葛德曜   24      男  理科三班\n",
       "4  1500100005  宣谷芹   22      女  理科五班"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stuDF.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "c30676a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "scoreDF = pd.read_csv('data/score.txt',header=None,names=['id','subject_id','score'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "bcc7b15e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>subject_id</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000001</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000002</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000003</td>\n",
       "      <td>137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000004</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000005</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id  subject_id  score\n",
       "0  1500100001     1000001     98\n",
       "1  1500100001     1000002      5\n",
       "2  1500100001     1000003    137\n",
       "3  1500100001     1000004     29\n",
       "4  1500100001     1000005     85"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scoreDF.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "50abadc9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1000 entries, 0 to 999\n",
      "Data columns (total 5 columns):\n",
      " #   Column  Non-Null Count  Dtype \n",
      "---  ------  --------------  ----- \n",
      " 0   id      1000 non-null   int64 \n",
      " 1   name    1000 non-null   object\n",
      " 2   age     1000 non-null   int64 \n",
      " 3   gender  1000 non-null   object\n",
      " 4   clazz   1000 non-null   object\n",
      "dtypes: int64(2), object(3)\n",
      "memory usage: 39.2+ KB\n"
     ]
    }
   ],
   "source": [
    "# 常见的操作\n",
    "stuDF.info() # 查看每一列的信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "dc538088",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.000000e+03</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.500101e+09</td>\n",
       "      <td>22.521000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.888194e+02</td>\n",
       "      <td>1.113013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.500100e+09</td>\n",
       "      <td>21.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.500100e+09</td>\n",
       "      <td>22.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.500101e+09</td>\n",
       "      <td>22.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.500101e+09</td>\n",
       "      <td>24.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.500101e+09</td>\n",
       "      <td>24.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id          age\n",
       "count  1.000000e+03  1000.000000\n",
       "mean   1.500101e+09    22.521000\n",
       "std    2.888194e+02     1.113013\n",
       "min    1.500100e+09    21.000000\n",
       "25%    1.500100e+09    22.000000\n",
       "50%    1.500101e+09    22.000000\n",
       "75%    1.500101e+09    24.000000\n",
       "max    1.500101e+09    24.000000"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stuDF.describe() # 查看数值型的列的一些统计信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "dc62d838",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>clazz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1500100016</td>\n",
       "      <td>潘访烟</td>\n",
       "      <td>23</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>1500100037</td>\n",
       "      <td>顾向松</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>1500100055</td>\n",
       "      <td>卫鸿熙</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>1500100069</td>\n",
       "      <td>束采波</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>1500100082</td>\n",
       "      <td>凌思菱</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>950</th>\n",
       "      <td>1500100951</td>\n",
       "      <td>平彭泽</td>\n",
       "      <td>22</td>\n",
       "      <td>男</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>956</th>\n",
       "      <td>1500100957</td>\n",
       "      <td>杜易云</td>\n",
       "      <td>21</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>972</th>\n",
       "      <td>1500100973</td>\n",
       "      <td>钮幼南</td>\n",
       "      <td>23</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>984</th>\n",
       "      <td>1500100985</td>\n",
       "      <td>申飞珍</td>\n",
       "      <td>21</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>985</th>\n",
       "      <td>1500100986</td>\n",
       "      <td>左天曼</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科一班</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>72 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id name  age gender clazz\n",
       "15   1500100016  潘访烟   23      女  文科一班\n",
       "36   1500100037  顾向松   22      女  文科一班\n",
       "54   1500100055  卫鸿熙   24      男  文科一班\n",
       "68   1500100069  束采波   22      女  文科一班\n",
       "81   1500100082  凌思菱   22      女  文科一班\n",
       "..          ...  ...  ...    ...   ...\n",
       "950  1500100951  平彭泽   22      男  文科一班\n",
       "956  1500100957  杜易云   21      女  文科一班\n",
       "972  1500100973  钮幼南   23      女  文科一班\n",
       "984  1500100985  申飞珍   21      女  文科一班\n",
       "985  1500100986  左天曼   22      女  文科一班\n",
       "\n",
       "[72 rows x 5 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stuDF[stuDF['clazz'] == '文科一班']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "1848bda5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 统计每个班级的人数\n",
    "resultDF1 = stuDF.groupby('clazz').agg('count')[['id','name']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "24616621",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = resultDF1.reset_index()[['clazz','id']].rename(columns={'id':'count'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c4bd35f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>clazz</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>文科一班</td>\n",
       "      <td>72</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>文科三班</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>文科二班</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>文科五班</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>文科六班</td>\n",
       "      <td>104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>文科四班</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>理科一班</td>\n",
       "      <td>78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>理科三班</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>理科二班</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>理科五班</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>理科六班</td>\n",
       "      <td>92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>理科四班</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   clazz  count\n",
       "0   文科一班     72\n",
       "1   文科三班     94\n",
       "2   文科二班     87\n",
       "3   文科五班     84\n",
       "4   文科六班    104\n",
       "5   文科四班     81\n",
       "6   理科一班     78\n",
       "7   理科三班     68\n",
       "8   理科二班     79\n",
       "9   理科五班     70\n",
       "10  理科六班     92\n",
       "11  理科四班     91"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "6ac37fcc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib\n",
    "zhfont= matplotlib.font_manager.FontProperties(fname=r'C:\\Windows\\Fonts\\FZYTK.TTF',size=10)\n",
    "sns.set(font=zhfont.get_name())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "e4351a4d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3wAAAJRCAYAAAAeUbV+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAofklEQVR4nO3dfXDddZ33/1fapmlqC23TUhi6XegiN7usaHGRgKvcLCpwVaCLdyORO3FhAF1BQWqdgqiMwjqiHYGFrkJ+uJSb9ecoiIt3Y6lLAUdAumyZwiIFofbG3lCkSZNz/aHksuZQ2pJzTvLJ4zHDDDnn5Jx33z3Jt8+cc3KaKpVKJQAAABRnRKMHAAAAoDYEHwAAQKEEHwAAQKEEHwAAQKEEHwAAQKEEHwAAQKEEHwAAQKFGNXqAgfC7321Kb6+3EwQAAIaXESOaMnHi617x/CKCr7e3IvgAAAD+jKd0AgAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFErwAQAAFGpUowcAGGp2mTA6Lc0tjR6jZjZ3b86GdV2NHgMAGACCD2AHtTS35PRvv6vRY9TMN066O4ngA4ASeEonAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoQQfAABAoeoSfLfcckuuuuqqJMny5csze/bszJo1KwsXLkySbNmyJRdffHFmzZqVj370o3nppZfqMRYAAEDRahp8a9euzcc+9rH867/+a99pV1xxRebOnZtbb701CxYsyOrVq3P33Xdn9OjR+e53v5vp06fn5ptvruVYAAAAw0JNg2/SpEm5+uqrc9555yVJent7s2zZssycOTOtra1pb2/P4sWLs3jx4hx11FFJkmOOOSaLFi2q5VgAAADDQl1fw7du3bqMHz++7+OJEydm9erVWbNmTSZNmrTVaQAAALw2o+p5Y5VKJa2trX0f9/b2plKpbHV6T09PKpXKDl1vW9u4AZ0TYLibMmX8q18IABj06hp8EyZMyKZNm/o+Xrt2bWbMmJHJkydnw4YNfae1tbXt0PWuWfNCent3LBIBdtZwiKFVqzY2egQAYDuMGNG0zQfA6vqUzpEjR2batGl56qmn0tXVlSVLlqS9vT2HHXZY3+v2fvazn+Xwww+v51gAAABFqvv78M2ZMyfz5s3Lhz70oXR0dGTq1Kk59thjs2bNmpx66qlZvnx5TjnllHqPBQAAUJymyo6+YG4Q8pROoJ6mTBmf07/9rkaPUTPfOOluT+kEgCFiUD2lEwAAgPoRfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUa1egBqL9Ju47OyNEtjR6jJnq6Nmft+q5GjwEAAIOC4BuGRo5uydNfPbnRY9TE9I/enkTwAQBA4imdAAAAxRJ8AAAAhRJ8AAAAhRJ8AAAAhRJ8AAAAhRJ8AAAAhRJ8AAAAhRJ8AAAAhfLG6wAAMIRM2vV1GTm63Mdterp6s3b9pkaPUQzBBwAAQ8jI0SPy1Feeb/QYNbPXP+/e6BGKUu6PBgAAAIY5wQcAAFAowQcAAFAor+EDAACGvEm7tmbk6HLzpqdrS9au//0Of165GwEAAIaNkaNHZeXV/9XoMWpm6sfad+rzPKUTAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUIIPAACgUKMaPQAMBhN2HZ3m0S2NHqNmurs2Z936rkaPAQBAnQk+SNI8uiU/WHBco8eomXeeeVcSwQcAMNx4SicAAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChBB8AAEChRjV6AGDw2nVCc0Y3j2n0GDXT1f1S1q/rbvQYAAA1I/iAVzS6eUyu63xno8eomX/q+EESwQcAlMtTOgEAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAol+AAAAAo1qtEDAFCG8RPGZExzc6PHqImXuruzcd1LjR4DAHZY3YOvUqnkU5/6VH7961/npZdeykUXXZTddtstF110Ubq7u3PKKafkfe97X73HAuA1GtPcnOO/fWWjx6iJO0/6ZDZG8AEw9NQ9+B577LH85je/yS233JIHHnggCxYsSHd3d+bOnZsDDjggJ5xwQo4++uhMnjy53qMBAAAUpe6v4Rs3blxeeOGFbNmyJevXr8+UKVOybNmyzJw5M62trWlvb8/ixYvrPRYAAEBx6v4I3/Tp03PUUUflmGOOSaVSyVe/+tU8+OCDfedPnDgxq1evrvdYAAAAxal78C1dujQrVqzIj3/84yxevDhXXnllWltb+87v7e1NpVLZoetsaxs30GMyhE2ZMr7RIwxK9lKdvVRnL/3ZCUD9+J5b3c7spe7Bt3jx4hx44IFpamrKIYcckk9/+tMZM2ZM3/lr167NjBkzdug616x5Ib29OxaJw1npX0CrVm3c4c8pfSeJvbwSe6nOXvrbmZ0A1ELp328Tx6FXUm0vI0Y0bfMBsLq/hm/q1Kl5/PHHkyQrVqzIXnvtlWnTpuWpp55KV1dXlixZkvb29nqPBQAAUJy6P8J37LHH5ic/+UnOPffcbNq0KRdccEHGjRuXefPmZfPmzeno6MjUqVPrPRYAAEBx6h58o0ePzle+8pV+p9944431HgUAAKBodQ8+AAD622XC2LQ0j2z0GDWzubsnG9a92OgxYNgRfAAAg0BL88h89NsrGj1GzXz1pL9o9AgwLNX9l7YAAABQH4IPAACgUJ7SCQA1Mn7CmIxpbm70GDXzUnd3Nq57qdFjALANgg8AamRMc3P+z+03N3qMmvneyR/Mxgg+gMHMUzoBAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKJfgAAAAKNarRA9TSpF3HZOTo5kaPUTM9Xd1Zu/6lRo8BAAAMUkUH38jRzVl1zf/X6DFqZso5pyQRfAAAQHWe0gkAAFAowQcAAFAowQcAAFAowQcAAFAowQcAAFAowQcAAFAowQcAAFAowQcAAFCoot94HQCAoW3ChNelubnMxyi6u3uzbt2mRo9B4QQfAACDVnPziHx/4epGj1ETx75vcqNHYBgo88clAAAACD4AAIBSCT4AAIBCCT4AAIBCCT4AAIBCCT4AAIBCCT4AAIBCCT4AAIBCeeN1AKCuxk9ozZjmcv8J8lL3lmxc9/tGjwGQRPABAHU2pnlUTrz9R40eo2b+/5OPzsZGDwHwR57SCQAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUCjBBwAAUKiGBF9nZ2fe//735/jjj89Pf/rTLF++PLNnz86sWbOycOHCRowEAABQnFH1vsEnnngid911V/793/89jz32WL71rW/lueeey9y5c3PAAQfkhBNOyNFHH53JkyfXezQAAICi1P0Rvp/+9Kd529velqampvz1X/91PvvZz2bZsmWZOXNmWltb097ensWLF9d7LAAAgOLU/RG+FStWpLe3N2eeeWZefPHFXHLJJRk/fnzf+RMnTszq1avrPRYAAEBx6h58PT092bJlSxYsWJCf//znufjii9Pa2tp3fm9vbyqVyg5dZ1vbuIEec8iYMmX8q19omLGT6uylOnupzl76s5Pq7KU6e6nOXvqzk+rspbqd2Uvdg2/SpEnZY489kiRvfvOb8+KLL6a3t7fv/LVr12bGjBk7dJ1r1ryQ3t7+kTgc7iirVm3c4c8pfS92Up29VGcv1dlLf3ZSnb1UZy/V2Ut/dlKdvVRXbS8jRjRt8wGwur+Gr729PUuXLk2SPPnkk5k+fXqmTZuWp556Kl1dXVmyZEna29vrPRYAAEBx6v4I36GHHpr/+q//yrnnnps1a9bkk5/8ZCZMmJB58+Zl8+bN6ejoyNSpU+s9FgAAQHHqHnxJ8vGPf7zfaTfeeGMDJgEAAChXQ954HQAAgNoTfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIUSfAAAAIXaruC79957q57+4IMPDugwAAAADJxR2zrzN7/5TZ599tn8y7/8S1paWrY6r6mpKZdeemm+973v1XRAAAAAds42g+/+++/Pd77znTzzzDP56le/utV5TU1NOfLII2s6HAAAADtvm8F34okn5sQTT8y1116bs88+u14zAQAAMAC2GXwv+8AHPpDvf//72bx5c7/zTjzxxIGeCQAAgAGwXcF3+umnZ7fddsvkyZO3Or2pqUnwAQAADFLbFXzjx4/PtddeW+tZAAAAGEDb9bYMhx56aO6///5azwIAAMAA2q5H+G677bbcdttt/U5vamrKj370owEfCgAAgNduu4Lvxz/+ca3nAAAAYIBtV/B997vffcXzZs2aNWDDAAAAMHC2K/h+/vOfb/VxT09Pli5dmj322EPwAQAADFLbFXxXXHFFv9O6uroyZ86cAR8IAACAgbFdwbdy5cp+p73wwgt58sknB3wgAAAABsZ2Bd8HPvCBfqe1trbm/e9//4APBAAAwMDwWzoBAAAKtV3BlyQ/+clP8tBDD2XTpk1pa2vL4Ycfnje84Q21nA0AAIDXYMT2XOhzn/tcbrjhhowbNy777bdfWlpacuWVV+ZrX/tarecDAABgJ23XI3y/+tWvsnDhwq1OO/300/PBD34w559/fk0GAwAA4LXZrkf4enp68vTTT2912jPPPJOenp6aDAUAAMBrt12P8J1//vn54Ac/mEmTJmXXXXfNxo0b89JLL+ULX/hCrecDAABgJ21X8E2ZMiWHHHJIzjrrrGzevDkPPPBAnnjiiYwbN67W8wEAALCTtiv4Lr300lx44YWZMWNGRo8enb333juPPPJIPvOZz+SWW26p9YwAAADshO16DV+SHHzwwRk9enSSZJdddslb3/rWjB07tmaDAQAA8Nps1yN873jHO3L88cfnsMMOS1tbW7q6uvLAAw/kyCOPrPV8AAAA7KTtCr4Pf/jD+Yd/+Ifcf//9WbduXdra2nLZZZdl3333rfV8AAAA7KTtCr4k2WuvvbLXXnvVcBQAAAAG0na/hg8AAIChRfABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUqmHBt3Hjxrz1rW/NE088keXLl2f27NmZNWtWFi5c2KiRAAAAitKw4Js/f35GjhyZJLniiisyd+7c3HrrrVmwYEFWr17dqLEAAACK0ZDgW7p0aUaNGpXp06enUqlk2bJlmTlzZlpbW9Pe3p7Fixc3YiwAAICi1D34ent78/Wvfz3nnHNOkmTdunUZP3583/kTJ070CB8AAMAAGFXvG1y4cGGOPfbYjBs3LklSqVTS2trad35vb28qlcoOXWdb27gBnXEomTJl/KtfaJixk+rspTp7qc5e+rOT6uylOnupzl76s5Pq7KW6ndlL3YPvvvvuy3PPPZebbropy5cvz9e//vVs2rSp7/y1a9dmxowZO3Sda9a8kN7e/pE4HO4oq1Zt3OHPKX0vdlKdvVRnL9XZS392Up29VGcv1dlLf3ZSnb1UV20vI0Y0bfMBsLo/pfPqq6/OrbfemltvvTV/8zd/k7lz52batGl56qmn0tXVlSVLlqS9vb3eYwEAABSn7o/wVTNnzpzMmzcvmzdvTkdHR6ZOndrokQAAAIa8hgZfZ2dn3//feOONDZwEAACgPA17Hz4AAABqS/ABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUalS9b7CrqysXX3xxfv3rXydJLr/88rS0tOSiiy5Kd3d3TjnllLzvfe+r91gAAADFqXvw3XXXXWlqasp//Md/ZNGiRbnuuuuyadOmzJ07NwcccEBOOOGEHH300Zk8eXK9RwMAAChK3Z/S2dLSkne/+91JkgkTJmTjxo1ZtmxZZs6cmdbW1rS3t2fx4sX1HgsAAKA4dX+E79hjj+37/xtvvDHt7e15/vnn+06bOHFiVq9eXe+xAAAAilP34EuS3t7eXHbZZWltbc3s2bNz9913b3VepVLZoetraxs30CMOGVOmjG/0CIOOnVRnL9XZS3X20p+dVGcv1dlLdfbSn51UZy/V7cxe6h58lUolF110Udra2nLJJZekp6cnmzZt6jt/7dq1mTFjxg5d55o1L6S3t38kDoc7yqpVG3f4c0rfi51UZy/V2Ut19tKfnVRnL9XZS3X20p+dVGcv1VXby4gRTdt8AKzur+G755570tXVlUsuuSRJMnLkyEybNi1PPfVUurq6smTJkrS3t9d7LAAAgOLU/RG+Rx99NI8++mje+973Jkl23333zJkzJ/PmzcvmzZvT0dGRqVOn1nssAACA4tQ9+C644IJccMEF/U6/8cYb6z0KAABA0er+lE4AAADqQ/ABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUSvABAAAUatAE35e+9KXMmjUrp59+etauXdvocQAAAIa8QRF8v/zlL/M///M/+e53v5t3vOMdmT9/fqNHAgAAGPIGRfAtXrw4Rx11VJLkmGOOyaJFixo8EQAAwNA3qtEDJMmaNWsyY8aMJMnEiROzevXqHfr8ESOaXvm88a97TbMNdtv6s2/LyPFTBniSwWNndzJm3G4DPMngsrN7Gfe6qQM8yeCys3tpG2sv1ew2dpcBnmTw2PmdOA5Vs9vYMQM8yeCys3uZNHbkAE8yuOzsXlrHDorHKGpiZ3cyahf3laqfN75lgCcZXKrt5dV21VSpVCq1Gmh7XXrppXn729+eI488Ml1dXTnkkEPy0EMPNXosAACAIW1Q/Lhk8uTJWb9+fZLkd7/7Xdra2ho8EQAAwNA3KILvsMMO63vd3s9+9rMcfvjhDZ4IAABg6BsUT+lMkquuuiqPPvpokuTKK6/MlCnlvsYMAACgHgZN8AEAADCwBsVTOgEAABh4gg8AAKBQgg8AAKBQgg8Ahhgvv6/OXqqzl/7shB0x1O8voxo9wGDxu9/9LuvWrcvee+/9qpetVCppatr2O9qXwE6qs5fq7KW6Wuylt7c3W7ZsyejRowdixLpzX/l/enp6ctpppyVJWlpa8uSTT2avvfZKc3NzHnzwwfziF79I8oc9PPLII7njjjsyduzYHHPMMZkwYULuuuuuTJ8+PSeccMJO3X5vb28++clPZvXq1Xn961+f3//+9xk1alSefPLJJMmXv/zlhvzW7FrtZfHixbn22mv73d5b3vKWnHfeeX0fD7e93H333bn55pv73d7HPvaxvPnNb+77eDDupVY7ufHGG/PDH/4wK1euzC677JLW1ta+866//vqMGTMmyeDcyWtR+nGoVveXT3ziE3n66aezfv36TJw4Mb/97W8zderUjBo1Kp2dnX2Xa9T9xW/p/KOurq6cdNJJufPOO/udN9wOtC8byJ0M9YPsnxrIvQzlg+yfG8i9lHSgrdX3lssvvzyXXHJJRo3q/3O7wb6XgdzJYD/Ibo+1a9fmkksuyVlnnZXbb789e+65Z6ZPn5758+fnnnvu2eqyp512Wj73uc9l2rRp+eIXv5ixY8e+puNQkjzzzDOZO3duvvnNb+bTn/503v3ud+fmm2/OOeeckwMOOOC1/vF2mr1UZy/91XInF1xwQU499dQcdNBBr3j7g3En9fhh0lA9DtXi/vKJT3wi//zP/5z58+fnrLPOyiWXXJIbbrgh55577lbHoqQx95dh/ZTOxx57LB0dHeno6MiZZ56Znp6evo/POOOMvss1NTXloIMOytNPP51TTjklBx98cG6//fbXfPsjRozIxz/+8TQ1NWXu3LlJkuOOOy4TJ07MnDlzGvKFUKudHH744ens7Oz335/GXjI4d5LUbi/vete7qu7lT2MvGX57OfXUU9PZ2ZkDDzwwn/nMZ7bazcuxlwy/vfyppqam/PrXv6563mDcSy138uUvfzlvetOb8vnPfz6TJ0/Odddd1+8yg3En1SxatCjf/va3c+2112b16tV9p69atSodHR1ZunRpLrjggvzoRz/KhRdeOKC3/cwzzwzo9Q0ke6nOXvob6J28+OKLVYOmmsG0k5EjR+bqq6/O2LFjc/bZZ+eQQw7Jm970phx33HGZNGlS3+WG03GomuHyNTSsg++AAw5IZ2dnPvzhD6ezszN33313xo4dm1NPPTX/9m//1ne50v7St8VOqrOX6mq9l6F6oK3FXlasWNEXSB0dHVm2bFkuvfTSvo9XrVpV9fMGy158Db2yRYsWZdWqVVm2bFn+/u//PieddFLOPvvsTJ48ue8yU6ZMSWdnZ7Zs2ZK5c+dm//33z9KlSwd0jp/+9Ke57777BvQ6Xwt7qc5e+qvlTiqVSt/TFj/ykY9k7dq1r3jZwbSTPzcQYTPUj0MvG45fQ8M6+F72wgsvpKurK8kfnmo0duzYrc4v7S99e9hJdfZSXa32MtQPtAO5l7/4i79IZ2dn5s+fnwMPPDCdnZ1bPfr5Sj8tHWx78TXU36GHHpq2trbss88+r/iPsiT57W9/mxdffDHXX399urq6ctNNN1W9vpf/AfdK//X09PRdtre3N5dddlkef/zxrFu3LpMnT85dd92V+++/P5/+9Kdf8R9w9TDQe3nZP/3TP/XbycqVK7e6zHDcy/YYrHupxU4efPDBdHR05KGHHsqcOXPS0dGR++67b6uvn2Tw7iQZ+LAp5ThUy6+h7u7ubb6usVH3F8GXP9yBH3jggSR/+MudPn16v8sMtwPtQO7kZUP9IJvUZi/bY7jtpYQDbVKb+8uGDRuyaNGiJMlzzz1X9TKDeS+1+hoarAfZ7dHc3JwRI0ZkwoQJ6ejo6PtH2cyZM7e63JIlS7L77rvnve99b7q6unLRRRdlw4YN+e///u88++yzfZd7+R9wr/TfyJEj+y47YsSIzJs3L/vuu2/OO++87LPPPjnuuONyyCGH5POf/3xDn3Y10Ht52XXXXddvJ1OnTt3qMsNxL9tjsO6lFjt585vfnM7Ozvzt3/5t5s+fX/V+kgzenSS1C5uhfhyq5ddQd3d3dt9991e87UbdXwRfkv333z933nln1q5dmy1btmTPPffsd5nhdqAdyJ28bKgfZJPa7GV7DLe9lHCgTWpzf9m0aVNfJK1cuTJr1qzpd5nBvJdafQ0N1oPsjthvv/1y9tln9328YMGCrc5vamrK3nvvnTFjxuT+++/Pj3/846xZsyZ77rln9thjjwGZYcuWLWlubh6Q6xoo9lKdvfRXi52MGjWq7/tDb29vXve6121zhsG0k1qFzVA/Dr1soO8vPT09fb8Yp7u7e7tmqNf9xdsyJBk9enTa2tpy/vnn57TTTqv662j//C991KhRWbNmTd74xjcW+Y3TTqqzl+pqtZehfKBNarOXlStX5i1veUuS5MADD8xNN92Uj3/849ucYzDtpRY7GcwH2R1x55135pZbbsmzzz6blpaW3HPPPZk/f36SP7yVxRFHHJHbb789e+yxR/7u7/4uSbJ69ersuuuuGTHitf/89le/+lVWrVqVffbZ5zVf10Cyl+rspb9a7GTatGl95+255579nob+pwbjTpI/hM1+++2Xr33ta0lePWxKPw69bKDvL/fee286OjqSJJMmTXrVR0rreX8RfH+0//775/bbb8+uu+7a77zh+o3TTqqzl+pqsZcSDrQDvZfHHnssJ598ct91n3feeTnyyCPzxje+sertD8a9DPROBvNB9tWsWLEi11xzTfbff/8cdthhOf744/O1r32t36/9njhxYk1uf+XKlbn++uvz9NNP5/nnn8/b3va27LLLLjW5rR1hL9XZS3+13Mmjjz6aD33oQ0n+8L1p9uzZ/S4zGHfy5wY6bIbycaiW95f999+/74fUCxYsyIYNG7JkyZKtLtOo+4vgyx++ENavX5977rknF154YRYuXJjjjjsu73nPezJixIhh9Y3zZXZSnb1UV4u9lHCgHei9dHd3p6Wlpe8F9yNGjMhpp52WM844I7fddlv+6q/+Ksng3kst7iuD+SD7aqZNm5YvfOELSZJLL700TzzxRN8/ym6//fbMmTOnpu/j1dzcnD333DOdnZ1Zvnx5TjvttJxzzjn55S9/mWeeeSbXXXddQ556Vau9nHHGGa/4KPCfXudw2svy5ctz2WWXveL5f/oeYoNxL7W6r7z8Wydf/vOsW7cuEyZMyB133JF58+b1xctg3ElSu7AZ6sehWn7P/eIXv5iWlpa+j7ds2dLvN4036v4yrIOvq6sr3/ve93LQQQf13UGvueaafOc738njjz8+II/GbMtg/CZRq50M9YNsLfYy1A+ySe3uL0P9QFurvXz/+9/Pe97znq1OO+WUU/Lb3/42f/mXf9l32mDcSy2/3w7mg+yr+dOntF566aWvevlvfvObW318/vnnv6bbnzRpUj7ykY8kSd/rKa+55prXdJ0DoVZ7+dO3ANmW4bSXffbZp98bQ7+SwbiXWt1XhvJOktqFzVA+DiW1u79cddVV23X7jbq/NFUqlUrNbwUAAIC681s6AQAACiX4AAAACiX4AAAACiX4AGA7HXXUUXn++ecbPQYAbDfBBwAAUKhh/bYMALAtN9xwQ37wgx+kUqls9TYqGzZsyIUXXpjf//73qVQque666/Lwww/n2muvTZI8++yzeec735lKpZKlS5cmSR5++OHccccdef3rX9+QPwsAw5PgA4Aq/vd//zcLFy7MnXfemV/84hdbvYfbD3/4wxx22GE5/fTTc8UVV+Tee+/Nu971rhx++OHp6enJP/7jP+bkk0/ue8/Bhx9+OF/60pfEHgB1J/gAoIr7778/hx12WEaPHp2ZM2emra0tZ599dpJk9uzZSZIXXnghzz33XPbbb7++z7vjjjsyc+bMvthL/vDm8J/61Kfq+wcAgAg+AKhq3bp1aWtrS5K0tLRk33337Ttvw4YNueqqq1KpVLJhw4a+01988cV84xvfyM0339x32g9+8IPsscceecMb3lC/4QHgj/zSFgCoYvz48enq6kqSrF+/PnfddVffeVdeeWX222+/XH755dlzzz37Tr/hhhsye/bsTJo0KUnS3d2d+fPn54ILLqjv8ADwR4IPAKo4+OCD8+CDD6ZSqeSRRx7JokWL+s5bs2ZNpkyZkiRZsWJFKpVKVq5cmf/8z//Mqaee2ne5b33rWzniiCO2ikIAqCdP6QSAKvbbb78cccQROeOMM7Jp06ZcfvnlWbJkSZLkQx/6UD772c/mzjvvzKxZs/LlL385q1evzsaNG3PmmWcmSd7+9rfntttuy7hx4/LQQw8lSebOnbvV6/0AoNaaKpVKpdFDAAAAMPA8pRMAAKBQgg8AAKBQgg8AAKBQgg8AAKBQgg8AAKBQgg8AAKBQgg8AAKBQgg8AAKBQ/xd4w04R80+aqwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1080x720 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(15, 10))\n",
    "sns.barplot(x=\"clazz\", y=\"count\", data=df)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "02a8328f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>clazz</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>文科六班</td>\n",
       "      <td>104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>文科三班</td>\n",
       "      <td>94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>理科六班</td>\n",
       "      <td>92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>理科四班</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>文科二班</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>文科五班</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>文科四班</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>理科二班</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>理科一班</td>\n",
       "      <td>78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>文科一班</td>\n",
       "      <td>72</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>理科五班</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>理科三班</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   clazz  count\n",
       "4   文科六班    104\n",
       "1   文科三班     94\n",
       "10  理科六班     92\n",
       "11  理科四班     91\n",
       "2   文科二班     87\n",
       "3   文科五班     84\n",
       "5   文科四班     81\n",
       "8   理科二班     79\n",
       "6   理科一班     78\n",
       "0   文科一班     72\n",
       "9   理科五班     70\n",
       "7   理科三班     68"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values('count',ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "5df16da6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 关联\n",
    "# 统计每个学生的总分\n",
    "\n",
    "df2 = scoreDF.groupby('id').agg('sum').reset_index()[['id','score']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "f2a3bff3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df2.rename(columns={'score':'sum_score'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "b780627a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df3 = pd.merge(stuDF,df2,how='inner',on='id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "cbf807c6",
   "metadata": {},
   "outputs": [],
   "source": [
    "df3.to_csv('data/sum_score.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "78707fc4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "22    271\n",
       "24    260\n",
       "23    235\n",
       "21    234\n",
       "Name: age, dtype: int64"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stuDF['age'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "6a5c2324",
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df2[df2['id'] != 1500101000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "706f4d1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df4 = pd.merge(stuDF,df2,how='left',left_on='id',right_on='id') # 左关联"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "958657f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "df4['age'] = df4['age'].apply(lambda x:str(x)+'岁')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "a298e6b9",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>clazz</th>\n",
       "      <th>sum_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>施笑槐</td>\n",
       "      <td>22岁</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>406.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>吕金鹏</td>\n",
       "      <td>24岁</td>\n",
       "      <td>男</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>440.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1500100003</td>\n",
       "      <td>单乐蕊</td>\n",
       "      <td>22岁</td>\n",
       "      <td>女</td>\n",
       "      <td>理科六班</td>\n",
       "      <td>359.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1500100004</td>\n",
       "      <td>葛德曜</td>\n",
       "      <td>24岁</td>\n",
       "      <td>男</td>\n",
       "      <td>理科三班</td>\n",
       "      <td>421.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1500100005</td>\n",
       "      <td>宣谷芹</td>\n",
       "      <td>22岁</td>\n",
       "      <td>女</td>\n",
       "      <td>理科五班</td>\n",
       "      <td>395.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>1500100996</td>\n",
       "      <td>厉运凡</td>\n",
       "      <td>24岁</td>\n",
       "      <td>男</td>\n",
       "      <td>文科三班</td>\n",
       "      <td>355.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>1500100997</td>\n",
       "      <td>陶敬曦</td>\n",
       "      <td>21岁</td>\n",
       "      <td>男</td>\n",
       "      <td>理科六班</td>\n",
       "      <td>293.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>1500100998</td>\n",
       "      <td>容昆宇</td>\n",
       "      <td>22岁</td>\n",
       "      <td>男</td>\n",
       "      <td>理科四班</td>\n",
       "      <td>398.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>1500100999</td>\n",
       "      <td>钟绮晴</td>\n",
       "      <td>23岁</td>\n",
       "      <td>女</td>\n",
       "      <td>文科五班</td>\n",
       "      <td>371.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>1500101000</td>\n",
       "      <td>符瑞渊</td>\n",
       "      <td>23岁</td>\n",
       "      <td>男</td>\n",
       "      <td>理科六班</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id name  age gender clazz  sum_score\n",
       "0    1500100001  施笑槐  22岁      女  文科六班      406.0\n",
       "1    1500100002  吕金鹏  24岁      男  文科六班      440.0\n",
       "2    1500100003  单乐蕊  22岁      女  理科六班      359.0\n",
       "3    1500100004  葛德曜  24岁      男  理科三班      421.0\n",
       "4    1500100005  宣谷芹  22岁      女  理科五班      395.0\n",
       "..          ...  ...  ...    ...   ...        ...\n",
       "995  1500100996  厉运凡  24岁      男  文科三班      355.0\n",
       "996  1500100997  陶敬曦  21岁      男  理科六班      293.0\n",
       "997  1500100998  容昆宇  22岁      男  理科四班      398.0\n",
       "998  1500100999  钟绮晴  23岁      女  文科五班      371.0\n",
       "999  1500101000  符瑞渊  23岁      男  理科六班        NaN\n",
       "\n",
       "[1000 rows x 6 columns]"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8d122c6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
